Introduction

Some review of the subject and the list of hypotheses discussed at meetings.

Table 1

diamonds %>%
  tableone::CreateTableOne(
    data = .,
    includeNA = T,
    # strata = "visit",
    addOverall = T
  ) %>%
  tableone::kableone()
Overall
n 53940
carat (mean (SD)) 0.80 (0.47)
cut (%)
Fair 1610 ( 3.0)
Good 4906 ( 9.1)
Very Good 12082 (22.4)
Premium 13791 (25.6)
Ideal 21551 (40.0)
color (%)
D 6775 (12.6)
E 9797 (18.2)
F 9542 (17.7)
G 11292 (20.9)
H 8304 (15.4)
I 5422 (10.1)
J 2808 ( 5.2)
clarity (%)
I1 741 ( 1.4)
SI2 9194 (17.0)
SI1 13065 (24.2)
VS2 12258 (22.7)
VS1 8171 (15.1)
VVS2 5066 ( 9.4)
VVS1 3655 ( 6.8)
IF 1790 ( 3.3)
depth (mean (SD)) 61.75 (1.43)
table (mean (SD)) 57.46 (2.23)
price (mean (SD)) 3932.80 (3989.44)
x (mean (SD)) 5.73 (1.12)
y (mean (SD)) 5.73 (1.14)
z (mean (SD)) 3.54 (0.71)

Basic descriptive characteristics

Distribution of age by sex

diamonds %>%
  ggplot(aes(price, fill = color)) +
  geom_density(alpha = .3) +
  labs(
    title = "Age Male x Female", x = "Age",
    y = "Density"
  ) +
  theme_linedraw()

Missing and categories and distribution in one picture

tabplot::tableplot(diamonds)

Any correlated?

diamonds %>%
  select_if(is_numeric) %>%
  psych::pairs.panels(.,
    method = "pearson", # correlation method
    hist.col = "#00AFBB",
    density = TRUE, # show density plots
    ellipses = TRUE # show correlation ellipses
  )

More info on distribution with boxplots

diamonds %>%
  select_if(is.numeric) %>%
  gather(key = "ind", value = "values") %>%
  ggplot(aes(x = ind, y = values)) +
  geom_boxplot() +
  coord_flip() +
  theme_minimal() +
  scale_fill_grey()

System information

project.info
$config
$config$version
[1] "0.10.2"

$config$data_loading
[1] TRUE

$config$data_loading_header
[1] TRUE

$config$data_ignore
[1] ""

$config$cache_loading
[1] TRUE

$config$recursive_loading
[1] FALSE

$config$munging
[1] TRUE

$config$logging
[1] FALSE

$config$logging_level
[1] "INFO"

$config$load_libraries
[1] TRUE

$config$libraries
[1] "dtplyr"

$config$as_factors
[1] FALSE

$config$tables_type
[1] "data.table"

$config$attach_internal_libraries
[1] FALSE

$config$cache_loaded_data
[1] TRUE

$config$sticky_variables
[1] "NONE"

$config$underscore_variables
[1] TRUE

$config$cache_file_format
[1] "RData"


$packages
[1] "dtplyr"

$helpers
[1] "pclean.R"
sessionInfo()
R version 4.1.2 (2021-11-01)
Platform: aarch64-apple-darwin20 (64-bit)
Running under: macOS Monterey 12.1

Matrix products: default
LAPACK: /Library/Frameworks/R.framework/Versions/4.1-arm64/Resources/lib/libRlapack.dylib

locale:
[1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8

attached base packages:
[1] stats     graphics  grDevices utils     datasets  methods   base     

other attached packages:
 [1] forcats_0.5.1          stringr_1.4.0          dplyr_1.0.7            purrr_0.3.4           
 [5] readr_2.1.1            tidyr_1.1.4            ggplot2_3.3.5          tidyverse_1.3.1       
 [9] dtplyr_1.2.0           ProjectTemplate_0.10.2 tibble_3.1.6           digest_0.6.29         

loaded via a namespace (and not attached):
 [1] httr_1.4.2        viridisLite_0.4.0 jsonlite_1.7.2    splines_4.1.2     ffbase_0.13.3    
 [6] tmvnsim_1.0-2     here_1.0.1        modelr_0.1.8      assertthat_0.2.1  highr_0.9        
[11] cellranger_1.1.0  yaml_2.2.1        pillar_1.6.4      backports_1.4.1   lattice_0.20-45  
[16] glue_1.6.0        rvest_1.0.2       colorspace_2.0-2  htmltools_0.5.2   Matrix_1.3-4     
[21] survey_4.1-1      psych_2.1.9       pkgconfig_2.0.3   broom_0.7.11      labelled_2.9.0   
[26] haven_2.4.3       scales_1.1.1      tabplot_1.4.1     ff_4.0.5          tzdb_0.2.0       
[31] proxy_0.4-26      farver_2.1.0      generics_0.1.1    ellipsis_0.3.2    withr_2.4.3      
[36] cli_3.1.0         mnormt_2.0.2      survival_3.2-13   magrittr_2.0.1    crayon_1.4.2     
[41] readxl_1.3.1      evaluate_0.14     fs_1.5.2          fansi_0.5.0       nlme_3.1-153     
[46] class_7.3-19      xml2_1.3.3        tableone_0.13.0   rsconnect_0.8.25  tools_4.1.2      
[51] data.table_1.14.2 hms_1.1.1         mitools_2.4       lifecycle_1.0.1   munsell_0.5.0    
[56] reprex_2.0.1      compiler_4.1.2    e1071_1.7-9       rlang_0.4.12      grid_4.1.2       
[61] rstudioapi_0.13   labeling_0.4.2    rmarkdown_2.11    gtable_0.3.0      DBI_1.1.2        
[66] R6_2.5.1          zoo_1.8-9         lubridate_1.8.0   knitr_1.37        fastmap_1.1.0    
[71] bit_4.0.4         utf8_1.2.2        fastmatch_1.1-3   rprojroot_2.0.2   stringi_1.7.6    
[76] parallel_4.1.2    Rcpp_1.0.7        vctrs_0.3.8       dbplyr_2.1.1      tidyselect_1.1.1 
[81] xfun_0.29        

References

LS0tCnRpdGxlOiAiVGhlIHRpdGxlIgphdXRob3I6ICJhdXRob3IgbmFtZSIKZGF0ZTogInh4eHgiCm91dHB1dDoKICB3b3JkX2RvY3VtZW50OgogICAgdG9jOiB5ZXMKICBodG1sX25vdGVib29rOgogICAgY29kZV9mb2xkaW5nOiBoaWRlCiAgICBoaWdobGlnaHQ6IHplbmJ1cm4KICAgIHRoZW1lOiBmbGF0bHkKICAgIHRvYzogeWVzCiAgICB0b2NfZmxvYXQ6IHllcwogICAgZGZfcHJpbnQ6IHBhZ2VkCiAgICBjb2RlX2Rvd25sb2FkaW5nOiB5ZXMKICBodG1sX2RvY3VtZW50OgogICAgdG9jOiB5ZXMKICAgIGRmX3ByaW50OiBwYWdlZAogIHBkZl9kb2N1bWVudDoKICAgIHRvYzogeWVzCmFic3RyYWN0OiBUaGlzIGlzIGFuIGFic3RyYWN0Cm51bWJlci1zZWN0aW9uczogeWVzCmxhbmc6IGVuCi0tLQoKYGBge3Igc2V0dXAsIGluY2x1ZGU9RkFMU0V9CmxpYnJhcnkodGlkeXZlcnNlKQoKCnNldC5zZWVkKDQyKQpzZXR3ZChoZXJlOjpoZXJlKCkpICMgbmVlZGVkIGFzIHdlIGFyZSBpbiAvc3JjLCBpbiBsaW51eCBoZXJlKCkgc2hvdWxkIGJlIHVzZWQKCiMgTG9hZGluZyB0aGUgcHJvamVjdApQcm9qZWN0VGVtcGxhdGU6OmxvYWQucHJvamVjdCgpCnBjbGVhbigpCgojIFIgb3B0aW9ucwpvcHRpb25zKAogIGRpZ2l0cyA9IDIsICMgT25seSB0d28gZGVjaW1hbCBkaWdpdHMKICBzY2lwZW4gPSA5OTkgIyBSZW1vdmUgc2NpZW50aWZpYyBub3RhdGlvbiBmb3IgcHJldHR5IHByaW50aW5nCikKCiMgS25pdHIgb3B0aW9ucwprbml0cjo6b3B0c19jaHVuayRzZXQoCiAgY29tbWVudCA9IE5BLCAjIHJlbW92ZSBjb21tZW50IHN5bWJvbAogIGNhY2hlLnBhdGggPSAiLi4vY2FjaGUvIiwgIyB3aGVyZSBzaG91bGQgSSBzYXZlIGNhY2hlPwogIGZpZy5wYXRoID0gIi4uL2dyYXBocy8iLCAjIHdoZXJlIHNob3VsZCBJIHNhdmUgZmlndXJlcz8KICBlY2hvID0gVCwgIyBkb250IGVjaG8gYnkgZGVmYXVsdAogIGNhY2hlID0gRiwgIyBkb250IGNhY2hlIGJ5IGRlZmF1bHQKICBmaWcud2lkdGggPSAxMCwgIyBzZXR0aW5nIHRoZSBiZXN0IHdpdGR0aCBmb3IgZmlndXJlcwogIGZpZy5oZWlnaHQgPSA3LCAjIGJlc3QgaGVpZ2h0CiAgZHBpID0gMzAwLCAjIGhpZ2ggZHBpIGZvciBwdWJsaWNhdGlvbiBxdWFsaXR5CiAgZXJyb3IgPSBGLAogIHdhcm5pbmcgPSBGCikKYGBgCgoKIyBJbnRyb2R1Y3Rpb24KClNvbWUgcmV2aWV3IG9mIHRoZSBzdWJqZWN0IGFuZCB0aGUgbGlzdCBvZiBoeXBvdGhlc2VzIGRpc2N1c3NlZCBhdCBtZWV0aW5ncy4KCiMgVGFibGUgMQoKCmBgYHtyfQpkaWFtb25kcyAlPiUKICB0YWJsZW9uZTo6Q3JlYXRlVGFibGVPbmUoCiAgICBkYXRhID0gLiwKICAgIGluY2x1ZGVOQSA9IFQsCiAgICAjIHN0cmF0YSA9ICJ2aXNpdCIsCiAgICBhZGRPdmVyYWxsID0gVAogICkgJT4lCiAgdGFibGVvbmU6OmthYmxlb25lKCkKYGBgCgoKIyBCYXNpYyBkZXNjcmlwdGl2ZSBjaGFyYWN0ZXJpc3RpY3MKCiMjIERpc3RyaWJ1dGlvbiBvZiBhZ2UgYnkgc2V4CgpgYGB7ciBhZ2UtYnktc2V4LCByZXN1bHRzPSJoaWRlIn0KZGlhbW9uZHMgJT4lCiAgZ2dwbG90KGFlcyhwcmljZSwgZmlsbCA9IGNvbG9yKSkgKwogIGdlb21fZGVuc2l0eShhbHBoYSA9IC4zKSArCiAgbGFicygKICAgIHRpdGxlID0gIkFnZSBNYWxlIHggRmVtYWxlIiwgeCA9ICJBZ2UiLAogICAgeSA9ICJEZW5zaXR5IgogICkgKwogIHRoZW1lX2xpbmVkcmF3KCkKYGBgCgoKCiMjIE1pc3NpbmcgYW5kIGNhdGVnb3JpZXMgYW5kIGRpc3RyaWJ1dGlvbiBpbiBvbmUgcGljdHVyZQpgYGB7ciB0YWJwbG90LCBmaWcuY2FwPSJNaXNzaW5ncywgY2F0ZWdvcmllcyBhbmQgZGlzdHJpYnV0aW9ucyJ9CnRhYnBsb3Q6OnRhYmxlcGxvdChkaWFtb25kcykKYGBgCgojIyBBbnkgY29ycmVsYXRlZD8KCgpgYGB7ciBzY2F0dGVyfQpkaWFtb25kcyAlPiUKICBzZWxlY3RfaWYoaXNfbnVtZXJpYykgJT4lCiAgcHN5Y2g6OnBhaXJzLnBhbmVscyguLAogICAgbWV0aG9kID0gInBlYXJzb24iLCAjIGNvcnJlbGF0aW9uIG1ldGhvZAogICAgaGlzdC5jb2wgPSAiIzAwQUZCQiIsCiAgICBkZW5zaXR5ID0gVFJVRSwgIyBzaG93IGRlbnNpdHkgcGxvdHMKICAgIGVsbGlwc2VzID0gVFJVRSAjIHNob3cgY29ycmVsYXRpb24gZWxsaXBzZXMKICApCmBgYAoKCiMjIE1vcmUgaW5mbyBvbiBkaXN0cmlidXRpb24gd2l0aCBib3hwbG90cwoKYGBge3IgYm94cGxvdHN9CmRpYW1vbmRzICU+JQogIHNlbGVjdF9pZihpcy5udW1lcmljKSAlPiUKICBnYXRoZXIoa2V5ID0gImluZCIsIHZhbHVlID0gInZhbHVlcyIpICU+JQogIGdncGxvdChhZXMoeCA9IGluZCwgeSA9IHZhbHVlcykpICsKICBnZW9tX2JveHBsb3QoKSArCiAgY29vcmRfZmxpcCgpICsKICB0aGVtZV9taW5pbWFsKCkgKwogIHNjYWxlX2ZpbGxfZ3JleSgpCmBgYAoKCgojIFN5c3RlbSBpbmZvcm1hdGlvbgoKYGBge3J9CnByb2plY3QuaW5mbwpgYGAKCmBgYHtyfQpzZXNzaW9uSW5mbygpCmBgYAoKIyBSZWZlcmVuY2VzCg==